import sys

text_in = sys.argv[1]
text_out = sys.argv[2]

with open(text_in, 'r', encoding='utf-8') as tr:
    with open(text_out, 'w', encoding='utf-8') as tw:
        for line in tr:
            parts = line.strip().split()
            uttid = parts[0]
            context = ' '.join(parts[1:])

            keep = False
            new_context = ""
            for s in context:

                if s == '[':
                    keep = True
                    new_context += ' ['
                    continue

                if s == ']':
                    keep = False
                    new_context += '] '
                    continue

                if keep:
                    new_context += s
                    continue

                if s.encode('utf-8').isalpha():
                    new_context += s
                else:
                    new_context += ' '
                    new_context += s
                    new_context += ' '

            context = ' '.join(new_context.split())
            tw.write(uttid+' '+context+'\n')
